Background

See CausalInferenceA

Introduction

Data

Load Data

harvest.2013.dat <- read.csv(file='./data/A 2013 Soybeans Harvest.csv')
harvest.2015.dat <- read.csv(file='./data/A 2015 Wheat Harvest.csv')
harvest.2018.dat <- read.csv(file='./data/A 2018 Corn Harvest.csv')
harvest.2016.dat <- read.csv(file='./data/A 2016 Corn Harvest.csv')
harvest.2017.dat <- read.csv(file='./data/A 2017 Soybeans Harvest.csv')
seed.2018.dat <-read.csv(file='./data/A 2018 Corn Seeding.csv')
harvest.2019.dat <- read.csv(file='./data/A 2019 Soybeans Harvest.csv')
seed.2020.dat <-read.csv(file='./data/A 2020 Corn Seeding.csv')
harvest.2020.dat <-read.csv(file='./data/A 2020 Corn Harvest.csv')

Screen for outliers.

seed.2018.dat <- remove.seed.outliers.fn(seed.2018.dat)
seed.2020.dat <- remove.seed.outliers.fn(seed.2020.dat)
harvest.2020.dat <- remove.harvest.outliers.fn(harvest.2020.dat)
harvest.2019.dat <- remove.harvest.outliers.fn(harvest.2019.dat)
harvest.2018.dat <- remove.harvest.outliers.fn(harvest.2018.dat)
harvest.2017.dat <- remove.harvest.outliers.fn(harvest.2017.dat)
harvest.2013.dat <- remove.harvest.outliers.fn(harvest.2013.dat)
harvest.2015.dat <- remove.harvest.outliers.fn(harvest.2015.dat)
harvest.2016.dat <- remove.harvest.outliers.fn(harvest.2016.dat)
grid.size=50
all.harvest.dat <- rbind(harvest.2013.dat,
                         harvest.2015.dat,
                         harvest.2016.dat,
                         harvest.2017.dat,
                         harvest.2018.dat,
                         harvest.2019.dat,
                         harvest.2020.dat)
all.harvest.dat$LatCell <- as.factor(floor(all.harvest.dat$Latitude/grid.size))
all.harvest.dat$LonCell <- as.factor(floor(all.harvest.dat$Longitude/grid.size))

all.harvest.dat$Cell <- all.harvest.dat$LonCell:all.harvest.dat$LatCell

common.dat <- aggregate(Longitude ~ Cell, data = all.harvest.dat,mean)
row.names(common.dat) <- common.dat$Cell
Lats <- aggregate(Latitude ~ Cell, data = all.harvest.dat,mean)
row.names(Lats) <- Lats$Cell

Counts <- aggregate(Yield ~ Cell, data = all.harvest.dat,length)
row.names(Counts) <- Counts$Cell

head(Counts)
##      Cell Yield
## 0:7   0:7   399
## 0:8   0:8   683
## 0:9   0:9   668
## 0:10 0:10   719
## 0:11 0:11   716
## 0:12 0:12   230
common.dat$Latitude <- Lats[row.names(common.dat),'Latitude']
common.dat$Counts <- Counts[row.names(common.dat),'Yield']
head(common.dat)
##      Cell Longitude Latitude Counts
## 0:7   0:7  28.43886 382.1674    399
## 0:8   0:8  27.94464 425.2921    683
## 0:9   0:9  26.94234 474.8385    668
## 0:10 0:10  27.99323 525.3938    719
## 0:11 0:11  27.05665 574.8760    716
## 0:12 0:12  28.71218 612.3923    230
common.dat <- common.dat[common.dat$Count>4,]

See CausalInferenceA for models, and YieldDataModels/ModelSelection for model choice

Here, we use bs with 12 df

#basis splines

df <- 12
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2018.dat)
common.dat$Y18 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
hist(common.dat$Y18)

l1 <- lm(Yield ~bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2017.dat)
common.dat$Y17 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2016.dat)
common.dat$Y16 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2015.dat)
common.dat$Y15 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 190.210966603871, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 190.210966603871, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2013.dat)
common.dat$Y13 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.210411192895, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.210411192895, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2019.dat)
common.dat$Y19 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.07298058378, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 208.07298058378, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(Yield ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=harvest.2020.dat)
common.dat$Y20 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(AppliedRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2018.dat)
common.dat$AR18 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(ControlRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2018.dat)
common.dat$R18 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in predict.lm(l1, newdata = common.dat[, c("Longitude", "Latitude")]):
## prediction from a rank-deficient fit may be misleading
l1 <- lm(ControlRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2020.dat)
common.dat$R20 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, :
## prediction from a rank-deficient fit may be misleading
l1 <- lm(AppliedRate ~ bs(Longitude,df=df)*bs(Latitude,df=df),data=seed.2020.dat)
common.dat$AR20 <- predict(l1,newdata=common.dat[,c("Longitude","Latitude")])
## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases

## Warning in bs(Longitude, degree = 3L, knots = c(`10%` = 193.468573301737, :
## prediction from a rank-deficient fit may be misleading
common.dat$Y13r <- rank(common.dat$Y13)
common.dat$Y13r <- common.dat$Y13r/max(common.dat$Y13r)

common.dat$Y15r <- rank(common.dat$Y15)
common.dat$Y15r <- common.dat$Y15r/max(common.dat$Y15r)

Maps <- data.frame(Longitude=c(common.dat$Longitude,common.dat$Longitude),
                   Latitude=c(common.dat$Latitude,common.dat$Latitude),
                   Value=c(common.dat$Y13r,common.dat$Y15r),
                   Map=c(rep('Y13r',length(common.dat$Y13r)),
                         rep('Y15r',length(common.dat$Y15r))))
ggplot(Maps, aes(Longitude,Latitude)) + 
geom_point(aes(colour = Value),size=0.5) + 
scale_colour_gradient2(low=vermillion, mid=yellow, high=blue, midpoint = 0.5) +
labs(colour = "Relative Rank", x="Easting", y="Northing", title = "Seeding and Yield (2018)") + facet_wrap(~ Map)

Model 4

model4.dag <- model2network("[Y17][R18|Y17][AR18|R18][Y18|AR18:Y17]")
graphviz.plot(model4.dag)
fit1 = bn.fit(model4.dag, common.dat[,c('Y18','R18','Y17','AR18')])
fit1
strength4 <- arc.strength(model4.dag, common.dat[,c('Y18','R18','Y17','AR18')])

strength.plot(model4.dag, strength4)


bf.strength4 <- bf.strength(model4.dag, common.dat[,c('Y18','R18','Y17','AR18')])
strength.plot(model4.dag, bf.strength4)
averaged.network(bf.strength4)
plot(bf.strength4)

strength4
bf.strength4
learned.dag = iamb(common.dat[,c('Y18','R18','Y17','AR18')])
graphviz.plot(learned.dag)
#learned.dag = set.arc(learned.dag, from = "Y17", to = "Y18")
##learned.dag = set.arc(learned.dag, from = "Y17", to = "R18")
#learned.dag = set.arc(learned.dag, from = "R18", to = "Y18")
#graphviz.plot(learned.dag)
#fit1 = bn.fit(learned.dag, common.dat[,c('Y18','R18','Y17')])
#fit1

Model 5 and 6

model6_dag <- dagify(Y18 ~ AR18,
       Y18 ~ Y17,
       AR18 ~ R18,
       R18 ~ RS,
       Y18 ~ RS,
       Y17 ~ Y16,
       Y16 ~ Y15,
       Y15 ~ RS,
       Y16 ~ RS,
       Y17 ~ RS,
       labels = c("Y18" = "Yield\n 2018", 
                  "R18" = "Control Rate",
                  "Y17" = "Yield \n2017"),
       latent = "RS",
       #exposure = "smoking",
       outcome = "Y18")
#ggdag(model6_dag, text = TRUE, use_labels = "label")
ggdag(model6_dag, text = TRUE)
require(mgcv)
k=160
s=2^(-5)
useGAM = FALSE
#nearly every path is significant
KRIG = TRUE
LM = FALSE

d=21
gam.dat <- seed.2018.dat

  b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2018.dat)
  gam.dat$Y18 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])

  b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2017.dat)
  gam.dat$Y17 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])

  b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2016.dat)
  gam.dat$Y16 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])

  b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2015.dat)
  gam.dat$Y15 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])

  b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2013.dat)
  gam.dat$Y13 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
  
  b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2019.dat)
  gam.dat$Y19 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
 
  b1 <- gam(ControlRate ~ s(Longitude,Latitude,k=k),data=seed.2020.dat)
  gam.dat$R20 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
  
  b1 <- gam(Yield ~ s(Longitude,Latitude,k=k),data=harvest.2020.dat)
  gam.dat$Y20 <- predict(b1,newdata=gam.dat[,c("Longitude","Latitude")])
# natural splines
i=9
ns.dat <- seed.2018.dat  

l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2018.dat)
ns.dat$Y18 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])

l1 <- lm(Yield ~ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2017.dat)
ns.dat$Y17 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])

l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2016.dat)
ns.dat$Y16 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])

l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2015.dat)
ns.dat$Y15 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])

l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2013.dat)
ns.dat$Y13 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
  
l1 <- lm(Yield ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=harvest.2019.dat)
ns.dat$Y19 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
  
l1 <- lm(ControlRate ~ ns(Longitude,df=1+2*(i-1))*ns(Latitude,df=1+2*(i-1)),data=seed.2020.dat)
ns.dat$R20 <- predict(l1,newdata=ns.dat[,c("Longitude","Latitude")])
map.dat <- common.dat
map.dat$Y19 <- rank(map.dat$Y19)
map.dat$Y19 <- map.dat$Y19/max(map.dat$Y19)
map.dat$Y18 <- rank(map.dat$Y18)
map.dat$Y18 <- map.dat$Y18/max(map.dat$Y18)
map.dat$Y17 <- rank(map.dat$Y17)
map.dat$Y17 <- map.dat$Y17/max(map.dat$Y17)

map.dat$Y16 <- rank(map.dat$Y16)
map.dat$Y16 <- map.dat$Y16/max(map.dat$Y16)
map.dat$Y15 <- rank(map.dat$Y15)
map.dat$Y15 <- map.dat$Y15/max(map.dat$Y15)
map.dat$Y13 <- rank(map.dat$Y13)
map.dat$Y13 <- map.dat$Y13/max(map.dat$Y13)

Maps6 <- data.frame(Longitude=c(map.dat$Longitude,
                                map.dat$Longitude,
                                map.dat$Longitude,
                                map.dat$Longitude,
                                map.dat$Longitude,
                                map.dat$Longitude),
                   Latitude=c(map.dat$Latitude,
                              map.dat$Latitude,
                              map.dat$Latitude,
                              map.dat$Latitude,
                              map.dat$Latitude,
                              map.dat$Latitude),
                   Value=c(map.dat$Y19,
                           map.dat$Y18,
                           map.dat$Y17,
                           map.dat$Y16,
                           map.dat$Y15,
                           map.dat$Y13),
                   Map=c(rep(2019,length(map.dat$Y19)),
                         rep(2018,length(map.dat$Y18)),
                         rep(2017,length(map.dat$Y17)),
                         rep(2016,length(map.dat$Y16)),
                         rep(2015,length(map.dat$Y15)),
                         rep(2013,length(map.dat$Y13))))
ggplot(Maps6, aes(Longitude,Latitude)) + 
geom_point(aes(colour = Value),size=.5) + 
scale_colour_gradient2(low=vermillion, mid=yellow, high=blue, midpoint = 0.5) +
labs(colour = "Relative Rank", x="Easting", y="Northing", title = "Multiple Years") + facet_wrap(~ Map)

mapped.dat <- common.dat
model5.dat <- common.dat[,c('Y18',"R18",'AR18',"Y17",'Y16','Y15')]
model5.dag <- model2network("[Y15][Y16|Y15][Y17|Y16][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17]")
#graphviz.plot(model5.dag,layout = "dot")
#graphviz.plot(model5.dag,layout = "fdp")
graphviz.plot(model5.dag,layout = "circo")
## Loading required namespace: Rgraphviz

model5.fit = bn.fit(model5.dag, model5.dat)
model5.fit
## 
##   Bayesian network parameters
## 
##   Parameters of node AR18 (Gaussian distribution)
## 
## Conditional density: AR18 | R18
## Coefficients:
## (Intercept)          R18  
## 7819.095715     0.708718  
## Standard deviation of the residuals: 595.3269 
## 
##   Parameters of node R18 (Gaussian distribution)
## 
## Conditional density: R18 | Y15 + Y16 + Y17
## Coefficients:
##  (Intercept)           Y15           Y16           Y17  
## 13170.310219     11.688495     -3.943456    238.455726  
## Standard deviation of the residuals: 810.0274 
## 
##   Parameters of node Y15 (Gaussian distribution)
## 
## Conditional density: Y15
## Coefficients:
## (Intercept)  
##    34.29315  
## Standard deviation of the residuals: 10.88382 
## 
##   Parameters of node Y16 (Gaussian distribution)
## 
## Conditional density: Y16 | Y15
## Coefficients:
## (Intercept)          Y15  
## 120.8531964    0.1483424  
## Standard deviation of the residuals: 12.83198 
## 
##   Parameters of node Y17 (Gaussian distribution)
## 
## Conditional density: Y17 | Y16
## Coefficients:
## (Intercept)          Y16  
## 50.58682189   0.05594438  
## Standard deviation of the residuals: 2.50968 
## 
##   Parameters of node Y18 (Gaussian distribution)
## 
## Conditional density: Y18 | AR18 + Y17
## Coefficients:
##   (Intercept)           AR18            Y17  
## -109.49460929     0.01168506     0.48117522  
## Standard deviation of the residuals: 27.09955
#bn.fit.qqplot(model5.fit)
strength5 <- arc.strength(model5.dag, model5.dat)

strength.plot(model5.dag, strength5,layout = "circo")

bf.strength5 <- bf.strength(model5.dag, model5.dat)
## Loading required namespace: Rmpfr
strength.plot(model5.dag, bf.strength5)

averaged.network(bf.strength5)
## 
##   Random/Generated Bayesian network
## 
##   model:
##    [R18][Y15][Y16][Y17|R18:Y15:Y16][Y18|R18][AR18|R18:Y17] 
##   nodes:                                 6 
##   arcs:                                  6 
##     undirected arcs:                     0 
##     directed arcs:                       6 
##   average markov blanket size:           3.00 
##   average neighbourhood size:            2.00 
##   average branching factor:              1.00 
## 
##   generation algorithm:                  Model Averaging 
##   significance threshold:                0.4057388
plot(bf.strength5)

strength5
##   from   to     strength
## 1  Y15  Y16 5.496561e-02
## 2  Y16  Y17 1.574890e-05
## 3  Y17  R18 1.741050e-22
## 4  Y16  R18 3.561959e-01
## 5  Y15  R18 2.259245e-02
## 6  R18 AR18 3.115335e-49
## 7 AR18  Y18 1.856984e-05
## 8  Y17  Y18 6.205303e-01
bf.strength5
##    from   to     strength    direction
## 1  AR18  R18 1.000000e+00 3.643412e-24
## 2  AR18  Y15 2.959340e-06 0.000000e+00
## 3  AR18  Y16 3.009519e-06 0.000000e+00
## 4  AR18  Y17 1.000000e+00 0.000000e+00
## 5  AR18  Y18 4.057388e-01 1.853371e-02
## 6   R18 AR18 1.000000e+00 1.000000e+00
## 7   R18  Y15 2.786016e-05 0.000000e+00
## 8   R18  Y16 2.419826e-06 0.000000e+00
## 9   R18  Y17 1.000000e+00 9.992223e-01
## 10  R18  Y18 1.000000e+00 1.000000e+00
## 11  Y15 AR18 2.959340e-06 1.000000e+00
## 12  Y15  R18 2.786016e-05 1.000000e+00
## 13  Y15  Y16 1.942213e-03 5.000000e-01
## 14  Y15  Y17 9.028457e-01 1.000000e+00
## 15  Y15  Y18 5.635342e-05 1.000000e+00
## 16  Y16 AR18 3.009519e-06 1.000000e+00
## 17  Y16  R18 2.419826e-06 1.000000e+00
## 18  Y16  Y15 1.942213e-03 5.000000e-01
## 19  Y16  Y17 9.126284e-01 8.466694e-01
## 20  Y16  Y18 6.804326e-05 1.000000e+00
## 21  Y17 AR18 1.000000e+00 1.000000e+00
## 22  Y17  R18 1.000000e+00 7.777172e-04
## 23  Y17  Y15 9.028457e-01 0.000000e+00
## 24  Y17  Y16 9.126284e-01 1.533306e-01
## 25  Y17  Y18 4.041052e-04 1.000000e+00
## 26  Y18 AR18 4.057388e-01 9.814663e-01
## 27  Y18  R18 1.000000e+00 0.000000e+00
## 28  Y18  Y15 5.635342e-05 0.000000e+00
## 29  Y18  Y16 6.804326e-05 0.000000e+00
## 30  Y18  Y17 4.041052e-04 0.000000e+00
model6.dat <- common.dat[,c('Y18',"R18",'AR18',"Y17",'Y16','Y15')]
model6.dag <- model2network("[Y15][Y16|Y15][Y17|Y16:Y15][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17:Y16:Y15]")
model6.dag
## 
##   Random/Generated Bayesian network
## 
##   model:
##    [Y15][Y16|Y15][Y17|Y15:Y16][R18|Y15:Y16:Y17][AR18|R18][Y18|AR18:Y15:Y16:Y17] 
##   nodes:                                 6 
##   arcs:                                  11 
##     undirected arcs:                     0 
##     directed arcs:                       11 
##   average markov blanket size:           4.67 
##   average neighbourhood size:            3.67 
##   average branching factor:              1.83 
## 
##   generation algorithm:                  Empty
model6.fit = bn.fit(model6.dag, model6.dat)
model6.fit
## 
##   Bayesian network parameters
## 
##   Parameters of node AR18 (Gaussian distribution)
## 
## Conditional density: AR18 | R18
## Coefficients:
## (Intercept)          R18  
## 7819.095715     0.708718  
## Standard deviation of the residuals: 595.3269 
## 
##   Parameters of node R18 (Gaussian distribution)
## 
## Conditional density: R18 | Y15 + Y16 + Y17
## Coefficients:
##  (Intercept)           Y15           Y16           Y17  
## 13170.310219     11.688495     -3.943456    238.455726  
## Standard deviation of the residuals: 810.0274 
## 
##   Parameters of node Y15 (Gaussian distribution)
## 
## Conditional density: Y15
## Coefficients:
## (Intercept)  
##    34.29315  
## Standard deviation of the residuals: 10.88382 
## 
##   Parameters of node Y16 (Gaussian distribution)
## 
## Conditional density: Y16 | Y15
## Coefficients:
## (Intercept)          Y15  
## 120.8531964    0.1483424  
## Standard deviation of the residuals: 12.83198 
## 
##   Parameters of node Y17 (Gaussian distribution)
## 
## Conditional density: Y17 | Y15 + Y16
## Coefficients:
## (Intercept)          Y15          Y16  
## 49.24033167   0.06409793   0.04918221  
## Standard deviation of the residuals: 2.417094 
## 
##   Parameters of node Y18 (Gaussian distribution)
## 
## Conditional density: Y18 | AR18 + Y15 + Y16 + Y17
## Coefficients:
##   (Intercept)           AR18            Y15            Y16            Y17  
## -116.85558972     0.01190639     0.02536271     0.12130809     0.22570330  
## Standard deviation of the residuals: 27.1727
#bn.fit.qqplot(model6.fit)
strength6 <- arc.strength(model6.dag, model6.dat)

strength.plot(model6.dag, strength6)

bf.strength6 <- bf.strength(model6.dag, model6.dat)

strength.plot(model6.dag, bf.strength6)

averaged.network(bf.strength6)
## 
##   Random/Generated Bayesian network
## 
##   model:
##     [partially directed graph]
##   nodes:                                 6 
##   arcs:                                  6 
##     undirected arcs:                     1 
##     directed arcs:                       5 
##   average markov blanket size:           2.00 
##   average neighbourhood size:            2.00 
##   average branching factor:              0.83 
## 
##   generation algorithm:                  Model Averaging 
##   significance threshold:                0.406479
plot(bf.strength6)

strength6
##    from   to     strength
## 1   Y15  Y16 5.496561e-02
## 2   Y16  Y17 8.711057e-05
## 3   Y15  Y17 1.721409e-05
## 4   Y17  R18 1.741050e-22
## 5   Y16  R18 3.561959e-01
## 6   Y15  R18 2.259245e-02
## 7   R18 AR18 3.115335e-49
## 8  AR18  Y18 1.513771e-05
## 9   Y17  Y18 8.263446e-01
## 10  Y16  Y18 3.997607e-01
## 11  Y15  Y18 8.820933e-01
bf.strength6
##    from   to     strength    direction
## 1  AR18  R18 1.000000e+00 3.643412e-24
## 2  AR18  Y15 2.959340e-06 0.000000e+00
## 3  AR18  Y16 3.009519e-06 0.000000e+00
## 4  AR18  Y17 1.000000e+00 0.000000e+00
## 5  AR18  Y18 4.064790e-01 2.154133e-02
## 6   R18 AR18 1.000000e+00 1.000000e+00
## 7   R18  Y15 2.786016e-05 0.000000e+00
## 8   R18  Y16 2.419826e-06 0.000000e+00
## 9   R18  Y17 1.000000e+00 5.000000e-01
## 10  R18  Y18 1.000000e+00 1.000000e+00
## 11  Y15 AR18 2.959340e-06 1.000000e+00
## 12  Y15  R18 2.786016e-05 1.000000e+00
## 13  Y15  Y16 1.942213e-03 5.000000e-01
## 14  Y15  Y17 9.028457e-01 1.000000e+00
## 15  Y15  Y18 5.091052e-05 1.000000e+00
## 16  Y16 AR18 3.009519e-06 1.000000e+00
## 17  Y16  R18 2.419826e-06 1.000000e+00
## 18  Y16  Y15 1.942213e-03 5.000000e-01
## 19  Y16  Y17 7.620855e-01 5.000000e-01
## 20  Y16  Y18 6.147137e-05 1.000000e+00
## 21  Y17 AR18 1.000000e+00 1.000000e+00
## 22  Y17  R18 1.000000e+00 5.000000e-01
## 23  Y17  Y15 9.028457e-01 0.000000e+00
## 24  Y17  Y16 7.620855e-01 5.000000e-01
## 25  Y17  Y18 3.103096e-04 1.000000e+00
## 26  Y18 AR18 4.064790e-01 9.784587e-01
## 27  Y18  R18 1.000000e+00 0.000000e+00
## 28  Y18  Y15 5.091052e-05 0.000000e+00
## 29  Y18  Y16 6.147137e-05 0.000000e+00
## 30  Y18  Y17 3.103096e-04 0.000000e+00
bn.cv(model5.dat, model5.dag)
## 
##   k-fold cross-validation for Bayesian networks
## 
##   target network structure:
##    [Y15][Y16|Y15][Y17|Y16][R18|Y15:Y16:Y17][AR18|R18][Y18|AR18:Y17] 
##   number of folds:                       10 
##   loss function:                         Log-Likelihood Loss (Gauss.) 
##   expected loss:                         32.30852
bn.cv(model6.dat, model6.dag)
## 
##   k-fold cross-validation for Bayesian networks
## 
##   target network structure:
##    [Y15][Y16|Y15][Y17|Y15:Y16][R18|Y15:Y16:Y17][AR18|R18][Y18|AR18:Y15:Y16:Y17] 
##   number of folds:                       10 
##   loss function:                         Log-Likelihood Loss (Gauss.) 
##   expected loss:                         32.3357
#VarCorr(default.gam)
#learned.dag = iamb(common.dat[,c('Y18',"R18","Y17","Y16","Y15")])
learned.dag = iamb(common.dat[,c('Y18','AR18',"R18","Y17","Y16","Y15","Y13")])
graphviz.plot(learned.dag)
model9.dag <- model2network("[Y15][Y16|Y15][Y17|Y16:Y15][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17:Y16:Y15][Y19|Y18][R20|Y19:Y18:Y17:Y16:Y15]")

model9.fit = bn.fit(model9.dag, common.dat[,c('R20','Y19','Y18',"R18",'AR18',"Y17",'Y16','Y15')])
model9.fit
bn.fit.qqplot(model9.fit)
strength9 <- arc.strength(model9.dag, mapped.dat[,c('R20','Y19','Y18',"R18",'AR18',"Y17",'Y16','Y15')])
strength9
strength.plot(model9.dag, strength9)
model9.dat <- common.dat[,c('Y20','R20','Y19','Y18',"R18",'AR18',"Y17",'Y16','Y15')]
model9.dag <- model2network("[Y15][Y16|Y15][Y17|Y16:Y15][R18|Y17:Y16:Y15][AR18|R18][Y18|AR18:Y17:Y16:Y15][Y19|Y18:Y17:Y16:Y15][R20|Y19:Y18:Y17:Y16:Y15][Y20|R20:Y19:Y18:Y17:Y16:Y15]")

model9.fit = bn.fit(model9.dag, model9.dat)
model9.fit
## 
##   Bayesian network parameters
## 
##   Parameters of node AR18 (Gaussian distribution)
## 
## Conditional density: AR18 | R18
## Coefficients:
## (Intercept)          R18  
## 7819.095715     0.708718  
## Standard deviation of the residuals: 595.3269 
## 
##   Parameters of node R18 (Gaussian distribution)
## 
## Conditional density: R18 | Y15 + Y16 + Y17
## Coefficients:
##  (Intercept)           Y15           Y16           Y17  
## 13170.310219     11.688495     -3.943456    238.455726  
## Standard deviation of the residuals: 810.0274 
## 
##   Parameters of node R20 (Gaussian distribution)
## 
## Conditional density: R20 | Y15 + Y16 + Y17 + Y18 + Y19
## Coefficients:
##  (Intercept)           Y15           Y16           Y17           Y18  
## 15836.225634      8.362858     -2.394976   -140.813604     50.018882  
##          Y19  
##   179.003682  
## Standard deviation of the residuals: 1023.939 
## 
##   Parameters of node Y15 (Gaussian distribution)
## 
## Conditional density: Y15
## Coefficients:
## (Intercept)  
##    34.29315  
## Standard deviation of the residuals: 10.88382 
## 
##   Parameters of node Y16 (Gaussian distribution)
## 
## Conditional density: Y16 | Y15
## Coefficients:
## (Intercept)          Y15  
## 120.8531964    0.1483424  
## Standard deviation of the residuals: 12.83198 
## 
##   Parameters of node Y17 (Gaussian distribution)
## 
## Conditional density: Y17 | Y15 + Y16
## Coefficients:
## (Intercept)          Y15          Y16  
## 49.24033167   0.06409793   0.04918221  
## Standard deviation of the residuals: 2.417094 
## 
##   Parameters of node Y18 (Gaussian distribution)
## 
## Conditional density: Y18 | AR18 + Y15 + Y16 + Y17
## Coefficients:
##   (Intercept)           AR18            Y15            Y16            Y17  
## -116.85558972     0.01190639     0.02536271     0.12130809     0.22570330  
## Standard deviation of the residuals: 27.1727 
## 
##   Parameters of node Y19 (Gaussian distribution)
## 
## Conditional density: Y19 | Y15 + Y16 + Y17 + Y18
## Coefficients:
## (Intercept)          Y15          Y16          Y17          Y18  
## 27.56697363  -0.08280492  -0.05653656   0.79320593  -0.07986473  
## Standard deviation of the residuals: 5.401567 
## 
##   Parameters of node Y20 (Gaussian distribution)
## 
## Conditional density: Y20 | R20 + Y15 + Y16 + Y17 + Y18 + Y19
## Coefficients:
##  (Intercept)           R20           Y15           Y16           Y17  
## -159.7271569     0.0116918     0.2581445     0.2919652    -0.2172005  
##          Y18           Y19  
##    0.2242037    -1.3614663  
## Standard deviation of the residuals: 9.04632
bn.fit.qqplot(model9.fit)

bf.strength9 <- bf.strength(model9.dag, model9.dat)

strength.plot(model9.dag, bf.strength9)

averaged.network(bf.strength9)
## Warning in averaged.network.backend(strength = strength, nodes = nodes, : arc
## R20 -> Y19 would introduce cycles in the graph, ignoring.
## 
##   Random/Generated Bayesian network
## 
##   model:
##     [partially directed graph]
##   nodes:                                 9 
##   arcs:                                  18 
##     undirected arcs:                     1 
##     directed arcs:                       17 
##   average markov blanket size:           5.56 
##   average neighbourhood size:            4.00 
##   average branching factor:              1.89 
## 
##   generation algorithm:                  Model Averaging 
##   significance threshold:                0.5340344
plot(bf.strength9)

bf.strength9 
##    from   to     strength    direction
## 1  AR18  R18 1.000000e+00 3.643412e-24
## 2  AR18  R20 3.437085e-07 1.000000e+00
## 3  AR18  Y15 2.959340e-06 0.000000e+00
## 4  AR18  Y16 3.009519e-06 0.000000e+00
## 5  AR18  Y17 1.000000e+00 0.000000e+00
## 6  AR18  Y18 4.064790e-01 2.154133e-02
## 7  AR18  Y19 9.997348e-01 1.000000e+00
## 8  AR18  Y20 5.340344e-01 1.000000e+00
## 9   R18 AR18 1.000000e+00 1.000000e+00
## 10  R18  R20 1.000000e+00 1.000000e+00
## 11  R18  Y15 2.786016e-05 0.000000e+00
## 12  R18  Y16 2.419826e-06 0.000000e+00
## 13  R18  Y17 1.000000e+00 5.000000e-01
## 14  R18  Y18 1.000000e+00 1.000000e+00
## 15  R18  Y19 9.999876e-01 1.000000e+00
## 16  R18  Y20 6.726247e-06 1.000000e+00
## 17  R20 AR18 3.437085e-07 0.000000e+00
## 18  R20  R18 1.000000e+00 0.000000e+00
## 19  R20  Y15 2.969466e-06 0.000000e+00
## 20  R20  Y16 1.155983e-06 0.000000e+00
## 21  R20  Y17 2.106598e-01 0.000000e+00
## 22  R20  Y18 1.000000e+00 0.000000e+00
## 23  R20  Y19 1.000000e+00 5.000000e-01
## 24  R20  Y20 1.000000e+00 5.000000e-01
## 25  Y15 AR18 2.959340e-06 1.000000e+00
## 26  Y15  R18 2.786016e-05 1.000000e+00
## 27  Y15  R20 2.969466e-06 1.000000e+00
## 28  Y15  Y16 1.942213e-03 5.000000e-01
## 29  Y15  Y17 9.028457e-01 1.000000e+00
## 30  Y15  Y18 5.091052e-05 1.000000e+00
## 31  Y15  Y19 5.444680e-03 1.000000e+00
## 32  Y15  Y20 7.793566e-01 1.000000e+00
## 33  Y16 AR18 3.009519e-06 1.000000e+00
## 34  Y16  R18 2.419826e-06 1.000000e+00
## 35  Y16  R20 1.155983e-06 1.000000e+00
## 36  Y16  Y15 1.942213e-03 5.000000e-01
## 37  Y16  Y17 7.620855e-01 5.000000e-01
## 38  Y16  Y18 6.147137e-05 1.000000e+00
## 39  Y16  Y19 1.644025e-03 1.000000e+00
## 40  Y16  Y20 9.998888e-01 1.000000e+00
## 41  Y17 AR18 1.000000e+00 1.000000e+00
## 42  Y17  R18 1.000000e+00 5.000000e-01
## 43  Y17  R20 2.106598e-01 1.000000e+00
## 44  Y17  Y15 9.028457e-01 0.000000e+00
## 45  Y17  Y16 7.620855e-01 5.000000e-01
## 46  Y17  Y18 3.103096e-04 1.000000e+00
## 47  Y17  Y19 9.986654e-01 1.000000e+00
## 48  Y17  Y20 8.877928e-04 1.000000e+00
## 49  Y18 AR18 4.064790e-01 9.784587e-01
## 50  Y18  R18 1.000000e+00 0.000000e+00
## 51  Y18  R20 1.000000e+00 1.000000e+00
## 52  Y18  Y15 5.091052e-05 0.000000e+00
## 53  Y18  Y16 6.147137e-05 0.000000e+00
## 54  Y18  Y17 3.103096e-04 0.000000e+00
## 55  Y18  Y19 1.000000e+00 1.509176e-06
## 56  Y18  Y20 9.998458e-01 1.000000e+00
## 57  Y19 AR18 9.997348e-01 0.000000e+00
## 58  Y19  R18 9.999876e-01 0.000000e+00
## 59  Y19  R20 1.000000e+00 5.000000e-01
## 60  Y19  Y15 5.444680e-03 0.000000e+00
## 61  Y19  Y16 1.644025e-03 0.000000e+00
## 62  Y19  Y17 9.986654e-01 0.000000e+00
## 63  Y19  Y18 1.000000e+00 9.999985e-01
## 64  Y19  Y20 1.000000e+00 1.000000e+00
## 65  Y20 AR18 5.340344e-01 0.000000e+00
## 66  Y20  R18 6.726247e-06 0.000000e+00
## 67  Y20  R20 1.000000e+00 5.000000e-01
## 68  Y20  Y15 7.793566e-01 0.000000e+00
## 69  Y20  Y16 9.998888e-01 0.000000e+00
## 70  Y20  Y17 8.877928e-04 0.000000e+00
## 71  Y20  Y18 9.998458e-01 0.000000e+00
## 72  Y20  Y19 1.000000e+00 0.000000e+00
#VarCorr(default.gam)
#learned.dag = iamb(common.dat[,c('Y18',"R18","Y17","Y16","Y15")])
learned.dag = iamb(mapped.dat[,c('R20','Y19','Y18','AR18',"R18","Y17","Y16","Y15","Y13")])
## Warning in vstruct.apply(arcs = arcs, vs = vs, nodes = nodes, debug = debug):
## vstructure Y19 -> AR18 <- R18 is not applicable, because one or both arcs are
## oriented in the opposite direction.
## Warning in vstruct.apply(arcs = arcs, vs = vs, nodes = nodes, debug = debug):
## vstructure Y19 -> R20 <- R18 is not applicable, because one or both arcs are
## oriented in the opposite direction.
graphviz.plot(learned.dag)

home.2017.krig <- krig.yield.fn(harvest.2017.dat[,c("Yield","Longitude","Latitude")],
                             seed.2018.dat[,c("Longitude","Latitude")],rng=20,idw=!krig)
home.krig <- krig.yield.fn(harvest.2018.dat[,c("Yield","Longitude","Latitude")],
                             seed.2018.dat[,c("Longitude","Latitude")],rng=20,idw=!krig)
home.2016.krig <- krig.yield.fn(harvest.2016.dat[,c("Yield","Longitude","Latitude")],
                             seed.2018.dat[,c("Longitude","Latitude")],rng=20,idw=!krig)
seed.2018.dat$Y17 <- home.2017.krig$Yield
seed.2018.dat$Y18 <- home.krig$Yield

if(!file.exists("home.Rda")) {
  home.krig <- krig.yield.fn(harvest.2018.dat[,c("Yield","Longitude","Latitude")],
                             seed.2018.dat[,c("Longitude","Latitude")],rng=60)
  home.2016.krig <- krig.yield.fn(harvest.2016.dat[,c("Yield","Longitude","Latitude")],
                             seed.2018.dat[,c("Longitude","Latitude")],rng=60)
  save(home.krig,home.2016.krig,file="home.Rda")
}

load(file="home.Rda")
 # plot(gamma ~ dist, data=home.krig$harvest.var,
#       ylim=c(0,max(home.krig$harvest.var$gamma)),col="blue")
 # abline(v=60)

2019 Response

GridLong <- reshape(common.dat, 
  varying = c('Y18','Y17','Y16','Y15','Y13','Y19'), 
  v.names = "Yield",
  timevar = "Year", 
  times = c(2018,2017,2016,2015,2013,2019), 
  direction = "long")
plot(Yield~Longitude,data=GridLong)

GridLong$Year <- factor(GridLong$Year)
ggplot(GridLong, aes(Longitude,Yield,color=Year)) + scale_colour_manual(values=cbPalette) +
geom_point(size=2) + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Response plots

Plot achieved yield against actual applied rates versus prescribed rates.

Model2.lm <- lm(Y18 ~ Y17 + R18,data=mapped.dat)
summary(Model2.lm)
## 
## Call:
## lm(formula = Y18 ~ Y17 + R18, data = mapped.dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -232.391   -5.139    1.802    8.420   67.464 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.673e+02  3.608e+01  -7.409 2.33e-12 ***
## Y17         -2.331e+00  6.549e-01  -3.560  0.00045 ***
## R18          2.362e-02  1.642e-03  14.385  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.51 on 233 degrees of freedom
## Multiple R-squared:  0.5214, Adjusted R-squared:  0.5173 
## F-statistic: 126.9 on 2 and 233 DF,  p-value: < 2.2e-16
Model2.alt.lm <- lm(Y18 ~ R18 + Y17,data=mapped.dat)
summary(Model2.alt.lm)
## 
## Call:
## lm(formula = Y18 ~ R18 + Y17, data = mapped.dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -232.391   -5.139    1.802    8.420   67.464 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.673e+02  3.608e+01  -7.409 2.33e-12 ***
## R18          2.362e-02  1.642e-03  14.385  < 2e-16 ***
## Y17         -2.331e+00  6.549e-01  -3.560  0.00045 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.51 on 233 degrees of freedom
## Multiple R-squared:  0.5214, Adjusted R-squared:  0.5173 
## F-statistic: 126.9 on 2 and 233 DF,  p-value: < 2.2e-16
Model2.2.lm <- lm(Y18 ~ Y17,data=mapped.dat)
summary(Model2.2.lm)
## 
## Call:
## lm(formula = Y18 ~ Y17, data = mapped.dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -340.51   -3.83    4.92   11.94   51.40 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   29.032     40.616   0.715    0.475    
## Y17            3.516      0.704   4.995 1.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 28.13 on 234 degrees of freedom
## Multiple R-squared:  0.09634,    Adjusted R-squared:  0.09248 
## F-statistic: 24.95 on 1 and 234 DF,  p-value: 1.153e-06
anova(Model2.2.lm)
## Analysis of Variance Table
## 
## Response: Y18
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Y17         1  19739 19738.5  24.947 1.153e-06 ***
## Residuals 234 185148   791.2                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model3.lm <- lm(Y18 ~ R18 + Y17+R18*Y17,data=mapped.dat)
Model3.alt.lm <- lm(Y18 ~ Y17 + R18 + R18*Y17,data=mapped.dat)
summary(Model3.lm)
## 
## Call:
## lm(formula = Y18 ~ R18 + Y17 + R18 * Y17, data = mapped.dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -205.043   -6.667    0.620    7.562   48.621 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.224e+03  6.745e+02  -6.262 1.83e-09 ***
## R18          1.743e-01  2.571e-02   6.781 9.80e-11 ***
## Y17          6.565e+01  1.159e+01   5.663 4.37e-08 ***
## R18:Y17     -2.587e-03  4.405e-04  -5.873 1.48e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.18 on 232 degrees of freedom
## Multiple R-squared:  0.5833, Adjusted R-squared:  0.5779 
## F-statistic: 108.3 on 3 and 232 DF,  p-value: < 2.2e-16
summary(Model3.alt.lm)
## 
## Call:
## lm(formula = Y18 ~ Y17 + R18 + R18 * Y17, data = mapped.dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -205.043   -6.667    0.620    7.562   48.621 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.224e+03  6.745e+02  -6.262 1.83e-09 ***
## Y17          6.565e+01  1.159e+01   5.663 4.37e-08 ***
## R18          1.743e-01  2.571e-02   6.781 9.80e-11 ***
## Y17:R18     -2.587e-03  4.405e-04  -5.873 1.48e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.18 on 232 degrees of freedom
## Multiple R-squared:  0.5833, Adjusted R-squared:  0.5779 
## F-statistic: 108.3 on 3 and 232 DF,  p-value: < 2.2e-16
anova(Model2.lm)
## Analysis of Variance Table
## 
## Response: Y18
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Y17         1  19739   19739   46.90 6.577e-11 ***
## R18         1  87087   87087  206.93 < 2.2e-16 ***
## Residuals 233  98061     421                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model2.alt.lm)
## Analysis of Variance Table
## 
## Response: Y18
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## R18         1 101492  101492 241.153 < 2.2e-16 ***
## Y17         1   5334    5334  12.673 0.0004495 ***
## Residuals 233  98061     421                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model3.lm)
## Analysis of Variance Table
## 
## Response: Y18
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## R18         1 101492  101492 275.813 < 2.2e-16 ***
## Y17         1   5334    5334  14.495 0.0001799 ***
## R18:Y17     1  12691   12691  34.489 1.475e-08 ***
## Residuals 232  85370     368                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(Model3.alt.lm)
## Analysis of Variance Table
## 
## Response: Y18
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Y17         1  19739   19739  53.641 3.941e-12 ***
## R18         1  87087   87087 236.667 < 2.2e-16 ***
## Y17:R18     1  12691   12691  34.489 1.475e-08 ***
## Residuals 232  85370     368                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(mapped.dat, aes(R18, Y18)) +
  geom_boxplot(aes(group = cut_width(R18, 500)), outlier.alpha = 0.1) +
geom_jitter(width = 50,alpha=0.1)

ggplot(mapped.dat, aes(R18,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'

ggplot(mapped.dat, aes(R18,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1,color=cbPalette[2]) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'

ggplot(mapped.dat, aes(Y17,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1,color=cbPalette[2]) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'

ggplot(mapped.dat, aes(Y17,R18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1,color=cbPalette[2]) + geom_smooth(se=FALSE,method='lm',color=grey)
## `geom_smooth()` using formula 'y ~ x'

ggplot(mapped.dat, aes(AR18,Y18)) + scale_color_manual(values=cbPalette) +
geom_point(size=1) + geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mapped.dat, aes(R18,Y18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
 geom_smooth()
ggplot(mapped.dat, aes(AR18,Y18)) +
geom_point(aes(color=Variety), size=1) + geom_smooth(aes(color=Variety)) + scale_color_manual(values=cbPalette)
ggplot(mapped.dat, aes(Y16,R18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
 geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mapped.dat, aes(Y16,Y18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
 geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mapped.dat, aes(Y17,Y18)) + geom_point(size=1) + scale_color_manual(values=cbPalette) +
 geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

TODO : optimize grid size

Generalized Additive Model

red.gam <- gam(Y18 ~  Y17, data=common.dat)
lm.gam <- gam(Y18 ~ R18 + Y17, data=common.dat)
gam.check(red.gam)

## 
## Method: GCV   Optimizer: magic
## Model required no smoothing parameter selectionModel rank =  2 / 2
gam.check(lm.gam)

## 
## Method: GCV   Optimizer: magic
## Model required no smoothing parameter selectionModel rank =  3 / 3
red.s.gam <- gam(Y18 ~ s(Y17), data=common.dat)
s.gam <- gam(Y18 ~ s(R18) + s(Y17), data=common.dat)
gam.check(red.s.gam)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradient at convergence was 0.0003138598 .
## The Hessian was positive definite.
## Model rank =  10 / 10 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##          k'  edf k-index p-value
## s(Y17) 9.00 1.05    1.02    0.53
gam.check(s.gam)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 8 iterations.
## The RMS GCV score gradient at convergence was 2.673539e-05 .
## The Hessian was positive definite.
## Model rank =  19 / 19 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##          k'  edf k-index p-value
## s(R18) 9.00 8.80    1.08    0.92
## s(Y17) 9.00 1.81    0.99    0.42
k9.gam <- gam(Y18 ~ s(R18,k=8) + s(Y17,k=8), data=common.dat)
summary(lm.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ R18 + Y17
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.673e+02  3.608e+01  -7.409 2.33e-12 ***
## R18          2.362e-02  1.642e-03  14.385  < 2e-16 ***
## Y17         -2.331e+00  6.549e-01  -3.560  0.00045 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.517   Deviance explained = 52.1%
## GCV = 426.28  Scale est. = 420.86    n = 236
summary(s.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18) + s(Y17)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 231.6885     0.7774     298   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df       F p-value    
## s(R18) 8.805  8.987 117.989  <2e-16 ***
## s(Y17) 1.814  2.312   0.642   0.498    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.836   Deviance explained = 84.4%
## GCV = 150.01  Scale est. = 142.62    n = 236
summary(k9.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18, k = 8) + s(Y17, k = 8)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 231.6885     0.8167   283.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df       F p-value    
## s(R18) 6.973  6.999 135.158  <2e-16 ***
## s(Y17) 1.881  2.398   0.775   0.405    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.819   Deviance explained = 82.6%
## GCV = 164.26  Scale est. = 157.4     n = 236
anova(lm.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ R18 + Y17
## 
## Parametric Terms:
##     df      F p-value
## R18  1 206.93 < 2e-16
## Y17  1  12.67 0.00045
anova(s.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18) + s(Y17)
## 
## Approximate significance of smooth terms:
##          edf Ref.df       F p-value
## s(R18) 8.805  8.987 117.989  <2e-16
## s(Y17) 1.814  2.312   0.642   0.498
anova(k9.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18, k = 8) + s(Y17, k = 8)
## 
## Approximate significance of smooth terms:
##          edf Ref.df       F p-value
## s(R18) 6.973  6.999 135.158  <2e-16
## s(Y17) 1.881  2.398   0.775   0.405
red.gam <- gam(Y18 ~  Y17 + Y16 + Y15 + Y13, data=common.dat)
lm.gam <- gam(Y18 ~ R18 + Y17 + Y16 + Y15 + Y13, data=common.dat)
gam.check(red.gam)

## 
## Method: GCV   Optimizer: magic
## Model required no smoothing parameter selectionModel rank =  5 / 5
gam.check(lm.gam)

## 
## Method: GCV   Optimizer: magic
## Model required no smoothing parameter selectionModel rank =  6 / 6
red.s.gam <- gam(Y18 ~ s(Y17) + s(Y16) + s(Y15) + s(Y13), data=common.dat)
s.gam <- gam(Y18 ~ s(R18) + s(Y17) + s(Y16) + s(Y15) + s(Y13), data=common.dat)
gam.check(red.s.gam)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 15 iterations.
## The RMS GCV score gradient at convergence was 0.0003430548 .
## The Hessian was positive definite.
## Model rank =  37 / 37 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##          k'  edf k-index p-value
## s(Y17) 9.00 2.43    1.03    0.67
## s(Y16) 9.00 5.08    0.93    0.15
## s(Y15) 9.00 8.76    1.09    0.89
## s(Y13) 9.00 7.75    0.96    0.28
gam.check(s.gam)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 23 iterations.
## The RMS GCV score gradient at convergence was 5.278616e-06 .
## The Hessian was positive definite.
## Model rank =  46 / 46 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##          k'  edf k-index p-value
## s(R18) 9.00 5.66    1.07    0.84
## s(Y17) 9.00 1.00    1.06    0.86
## s(Y16) 9.00 1.00    0.93    0.14
## s(Y15) 9.00 1.42    1.09    0.92
## s(Y13) 9.00 4.87    1.05    0.78
k9.gam <- gam(Y18 ~ s(R18,k=9) + s(Y17,k=9) + s(Y16,k=9) + s(Y15,k=9) + s(Y13,k=9), data=common.dat)
summary(lm.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ R18 + Y17 + Y16 + Y15 + Y13
## 
## Parametric coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -168.78696   33.74676  -5.002 1.13e-06 ***
## R18            0.01314    0.00179   7.344 3.57e-12 ***
## Y17           -0.18385    0.61286  -0.300    0.764    
## Y16           -0.11561    0.09450  -1.223    0.222    
## Y15           -0.58222    0.11413  -5.101 7.06e-07 ***
## Y13            2.36831    0.24155   9.805  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.663   Deviance explained =   67%
## GCV =  301.4  Scale est. = 293.74    n = 236
summary(s.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18) + s(Y17) + s(Y16) + s(Y15) + s(Y13)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 231.6885     0.6874   337.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df      F p-value    
## s(R18) 5.657  6.633 29.409  <2e-16 ***
## s(Y17) 1.000  1.000  0.268   0.605    
## s(Y16) 1.000  1.000  1.864   0.174    
## s(Y15) 1.424  1.733 24.170  <2e-16 ***
## s(Y13) 4.874  5.737 31.340  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.872   Deviance explained =   88%
## GCV = 119.05  Scale est. = 111.51    n = 236
summary(k9.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18, k = 9) + s(Y17, k = 9) + s(Y16, k = 9) + s(Y15, 
##     k = 9) + s(Y13, k = 9)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 231.6885     0.6871   337.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df      F p-value    
## s(R18) 5.703  6.534 30.054  <2e-16 ***
## s(Y17) 1.000  1.000  0.269   0.605    
## s(Y16) 1.000  1.000  1.870   0.173    
## s(Y15) 1.404  1.702 24.639  <2e-16 ***
## s(Y13) 4.754  5.541 29.447  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.872   Deviance explained =   88%
## GCV = 118.89  Scale est. = 111.4     n = 236
anova(lm.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ R18 + Y17 + Y16 + Y15 + Y13
## 
## Parametric Terms:
##     df      F  p-value
## R18  1 53.931 3.57e-12
## Y17  1  0.090    0.764
## Y16  1  1.497    0.222
## Y15  1 26.023 7.06e-07
## Y13  1 96.131  < 2e-16
anova(s.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18) + s(Y17) + s(Y16) + s(Y15) + s(Y13)
## 
## Approximate significance of smooth terms:
##          edf Ref.df      F p-value
## s(R18) 5.657  6.633 29.409  <2e-16
## s(Y17) 1.000  1.000  0.268   0.605
## s(Y16) 1.000  1.000  1.864   0.174
## s(Y15) 1.424  1.733 24.170  <2e-16
## s(Y13) 4.874  5.737 31.340  <2e-16
anova(k9.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Y18 ~ s(R18, k = 9) + s(Y17, k = 9) + s(Y16, k = 9) + s(Y15, 
##     k = 9) + s(Y13, k = 9)
## 
## Approximate significance of smooth terms:
##          edf Ref.df      F p-value
## s(R18) 5.703  6.534 30.054  <2e-16
## s(Y17) 1.000  1.000  0.269   0.605
## s(Y16) 1.000  1.000  1.870   0.173
## s(Y15) 1.404  1.702 24.639  <2e-16
## s(Y13) 4.754  5.541 29.447  <2e-16
plot(lm.gam,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)

plot(s.gam,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)

plot(k9.gam,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)

seed.2018.dat <- grid.field(seed.2018.dat)
cells.seed.dat$AR18 <- aggregate(AppliedRate ~ Row + Column,data=mapped.dat,FUN=mean,na.rm=TRUE)$AppliedRate
Model2_dag <- dagify(Y18 ~ R18,
       R18 ~ Y17,
       Y18 ~ Y17,
       labels = c("Y18" = "Yield\n 2018", 
                  "R18" = "Seeding Rate",
                  "Y17" = "Yield \n2017"),
       #latent = "unhealthy",
       #exposure = "smoking",
       outcome = "Y18")

ggdag(Model2_dag, text = FALSE, use_labels = "label")

Model2_dag <- dagify(Y18 ~ R18,
       Y18 ~ Y17,
       labels = c("Y18" = "Yield\n 2018", 
                  "R18" = "Seeding Rate",
                  "Y17" = "Yield \n2017"),
       #latent = "unhealthy",
       #exposure = "smoking",
       outcome = "Y18")

ggdag(Model2_dag, text = FALSE, use_labels = "label")